---
title: "Experience Exceeds Awareness of Anthropogenic Climate Change in Greenland"
journal: "Nature Climate Change"
author: "Kelton Minor, Manumina Lund-Jensen, Lawrence Hamilton, Mette Bendixen, David Dreyer Lassen, Minik T. Rosing"
code contact: "Kelton Minor, kelton dot minor at columbia dot edu"
date: "2023-04-16"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r}
#Load packages
library(sf)
library(lwgeom)
library(fst)
library(bit64) 
library(ggthemes) 
library(ggmap) 
library(ggplot2) 
library(Hmisc) 
library(RColorBrewer) 
library(stargazer) 
library(lubridate)
library(zoo) 
library(reshape2)
library(sp) 
library(raster) 
library(maptools) 
library(rgeos) 
library(rgdal) 
library(RCurl) 
library(data.table) 
library(stringr) 
library(viridis) 
library(readxl)
library(survey)
library(SDaA)
library(rnoaa)
library(lutz)
library(weathermetrics)
library(stringdist)
library(humidity)
library(survey)
library(modelsummary)
library(flextable)
library(MetBrewer)
library(remotes)
```


```{r}
#Set path to your directory
pathdir <-"/Home/Analysis/Replication_Files/Replication_Data/"
```


#Load Arctic Survey Data, Plot Figs
```{r}
#Merge world map country names data with countries included in prior international climate change surveys (2010-2021)
#Load world map country names data
map_names<-fread(paste0(pathdir,"basemap_names.csv"),header=T)
map_names<-map_names[,2]
setnames(map_names,old="V1",new="Country")
#Load intl survey country names data
svy_names<-fread(paste0(pathdir,"2010_2020_Global_Climate_Survey_Coverage.csv"),header=T)

tcount<-merge(svy_names,map_names,by=c("Country"))
rm(svy_names,map_names)
gc()

#create international survey appearance count var (sum by country)
tcount<-unique(tcount[,.(count=.N),by=.(Country)])
tcount

median(tcount$count)
mean(tcount$count)
```

```{r}
#Fig 1A - Plot count of country-level coverage across prior international climate surveys

#Load shape files
wld <- sf::st_read(paste0(pathdir,"Shapefiles/detailed/detailed_2013.shp"))

#convert tcount to log
wld$CNTRY_TERR

#keep wld countries, merge tweet count
wld<-left_join(wld, tcount, by = c("CNTRY_TERR"="Country"))

#Define Projection
#Lambert Azimuthal Equal-Area
laea_proj<-"+proj=laea +lat_0=75 +lon_0=-40 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs"

#transform to desired projection
world_laea = lwgeom::st_transform_proj(wld, crs = laea_proj)

#Lambert Azimuthal Equal-Area
wld_plot<-ggplot(world_laea) + 
  geom_sf(aes(fill = count), alpha=1, lwd = 0, col=alpha("white",.1)) +
  scale_fill_gradientn(colors = met.brewer("Hokusai2"),na.value="orange") +
  coord_sf(datum = NA) +
  theme_minimal() +
  labs(title = "", subtitle = "")
wld_plot

ggsave(paste0(pathdir,"/Figure_components/Fig_1A_International_Climate_Survey_Count.pdf"))

rm(world_laea,wld_plot)
gc()
```

```{r}
#Fig 1B,C,D - Plot prior Arctic National Climate Perception Survey Responses (2015-2021)

#B Beliefs
#Load Arctic Climate Change Beliefs + Experience National Survey Estimates Data
arct<-fread(paste0(pathdir,"2010_2020_Arctic_Nation_Climate_Survey_Responses.csv"),header=T)
#Limit to obs with belief data
arct<-arct[!is.na(Belief_Yes)]
#Quick plot of knowledge of climate change (Belief Yes)
ggplot(arct, aes(x=Belief_Yes, y=Country,alpha=Date)) + 
    geom_point(size=3, color="#6600CC") + scale_x_continuous(breaks=seq(0,100,10),limits=c(0,100)) + theme_minimal()

#compute 2018-2020 avg. by Arctic Nation
arct[as.Date(Date)>=as.Date("2018-01-01"),Belief_Yes_Avg:= mean(Belief_Yes,na.rm=T),by=.(Country)]
arct[,Belief_Yes_Avg:= mean(Belief_Yes,na.rm=T),by=.(Country)]
#create dt of Arctic Nation average was over 2018-2020 period 
test<-arct[,.(AVG=mean(Belief_Yes_Avg,na.rm=T)),by=.(Country)]
test_AVG<-mean(test$AVG,na.rm=T)
test
test_AVG

unique(arct[,.(Country)])
#compute 2018-2020 avg. by top five oil producing Arctic Nations
#https://ourworldindata.org/grapher/oil-production-by-country?tab=table
test<-test[(Country=="United States of America"|Country=="Russian Federation"|Country=="Canada"|Country=="Norway"|Country=="Denmark")]
#create dt of Arctic top oil producing nation average over 2018-2020 period
test_AVG_oil<-mean(test$AVG,na.rm=T)
test_AVG_oil
test

#create label for plotting 2018-2020 averages
arct[,max_date:=max(as.Date(Date)),by=.(Country)]
arct[,Belief_Yes_Avg_Label:= NaN]
arct[as.Date(Date)==max_date,Belief_Yes_Avg_Label:= Belief_Yes_Avg]

#plot 2018-2020 average by country
ac<-ggplot(arct, aes(x=Belief_Yes, y=Country,alpha=Date)) + 
    geom_point(size=2.5, color="#6600CC") + scale_x_continuous(breaks=seq(0,100,10),limits=c(10,100)) + theme_minimal()
ac + geom_point(aes(Belief_Yes_Avg),color="orange",size=2.5) + 
  geom_text(
    label=round(arct$Belief_Yes_Avg_Label,0), 
    nudge_x = 0.25, nudge_y = 0.25, 
    check_overlap = T
  ) + 
  geom_vline(xintercept=test_AVG,color="orange",alpha=.5) + #add 2018-2020 Arctic Nation avg.
  geom_vline(xintercept=test_AVG_oil,color="black",alpha=.5)  #add 2018-2020 Top Oil Producing Nation avg.

#save plot
ggsave(paste0(pathdir,"Figure_components/Fig_1B_Arctic_Nations_Belief.pdf"))

rm(arct,test,test_AVG,test_AVG_oil,ac)
gc()

#C Knowledge of human-caused climate change
#Load Arctic Climate Change Beliefs + Experience National Survey Estimates Data
arct<-fread(paste0(pathdir,"2010_2020_Arctic_Nation_Climate_Survey_Responses.csv"),header=T)
#Limit to obs with data on climate change knowledge
arct<-arct[!is.na(Human_Cause)]
#Quick plot of knowledge of climate change
ggplot(arct, aes(x=Human_Cause, y=Country,alpha=Date)) + 
    geom_point(size=3, color="blue") + scale_x_continuous(breaks=seq(0,100,10),limits=c(0,100)) + theme_minimal()

#compute 2018-2020 avg. by Arctic Nation
arct[as.Date(Date)>=as.Date("2018-01-01"),Human_Cause_Avg:= mean(Human_Cause,na.rm=T),by=.(Country)]
arct[,.(Country,Date,Human_Cause,Human_Cause_Avg)]
arct[,Human_Cause_Avg:= mean(Human_Cause,na.rm=T),by=.(Country)]
#determine what Arctic Nation average was over 2018-2020 period
test<-arct[,.(AVG=mean(Human_Cause_Avg,na.rm=T)),by=.(Country)]
test_AVG<-mean(test$AVG,na.rm=T)
test

#compute 2018-2020 avg. by top five oil producing Arctic Nations
#https://ourworldindata.org/grapher/oil-production-by-country?tab=table
test<-test[(Country=="United States of America"|Country=="Russian Federation"|Country=="Canada"|Country=="Norway"|Country=="Denmark")]
#create dt of Arctic top oil producing nation average over 2018-2020 period
test_AVG_oil<-mean(test$AVG,na.rm=T)
test_AVG_oil
test

#create label for plotting 2018-2020 averages
arct[,max_date:=max(as.Date(Date)),by=.(Country)]
arct[,Human_Cause_Avg_Label:= NaN]
arct[as.Date(Date)==max_date,Human_Cause_Avg_Label:= Human_Cause_Avg]

#plot 2018-2020 average by country
ac<-ggplot(arct, aes(x=Human_Cause, y=Country,alpha=Date)) + 
    geom_point(size=2.5, color="blue") + scale_x_continuous(breaks=seq(0,100,10),limits=c(10,100)) + theme_minimal()
ac + geom_point(aes(Human_Cause_Avg),color="orange",size=2.5) + 
  geom_text(
    label=round(arct$Human_Cause_Avg_Label,0), 
    nudge_x = 0.25, nudge_y = 0.25, 
    check_overlap = T
  ) + 
  geom_vline(xintercept=test_AVG,color="orange",alpha=.5) + #add 2018-2020 avg.  
  geom_vline(xintercept=test_AVG_oil,color="black",alpha=.5) + #add 2018-2020 Top Oil Producing Nation avg.
  geom_vline(xintercept=99,color="red",alpha=.5) #Lynas et al. 2021 estimate of scientific consensus
#save plot
ggsave(paste0(pathdir,"Figure_components/Fig_1C_Arctic_Nations_Human_Cause.pdf"))

names(arct)
rm(arct,test,test_AVG,test_AVG_oil,ac)
gc()


#D Experience of climate change
#Load Arctic Climate Change Beliefs + Experience National Survey Estimates Data
arct<-fread(paste0(pathdir,"2010_2020_Arctic_Nation_Climate_Survey_Responses.csv"),header=T)
#Limit to obs with experience data
arct<-arct[!is.na(Experience)]
#Quick plot of experience of climate change 
ggplot(arct, aes(x=Experience, y=Country,alpha=Date)) + 
    geom_point(size=3, color="#00CC99") + scale_x_continuous(breaks=seq(0,100,10),limits=c(0,100)) + theme_minimal()

#compute 2018-2020 avg. by Arctic Nation
arct[as.Date(Date)>=as.Date("2018-01-01"),Experience_Avg:= mean(Experience,na.rm=T),by=.(Country)]
arct[,.(Country,Date,Experience,Experience_Avg)]
arct[,Experience_Avg:= mean(Experience_Avg,na.rm=T),by=.(Country)]
arct[,.(Country,Date,Experience,Experience_Avg)]
#determine what Arctic Nation average was over 2018-2020 period
test<-arct[,.(AVG=mean(Experience_Avg,na.rm=T)),by=.(Country)]
test_AVG<-mean(test$AVG,na.rm=T)
test

#compute 2018-2020 avg. by top five oil producing Arctic Nations
#https://ourworldindata.org/grapher/oil-production-by-country?tab=table
test<-test[(Country=="United States of America"|Country=="Russian Federation"|Country=="Canada"|Country=="Norway"|Country=="Denmark")]
#create dt of Arctic top oil producing nation average over 2018-2020 period
test_AVG_oil<-mean(test$AVG,na.rm=T)
test_AVG_oil
test

#create label for plotting 2018-2020 averages
arct[,max_date:=max(as.Date(Date)),by=.(Country)]
arct[,Experience_Avg_Label:= NaN]
arct[as.Date(Date)==max_date,Experience_Avg_Label:= Experience_Avg]

#plot 2018-2020 average by country
ac<-ggplot(arct, aes(x=Experience, y=Country,alpha=Date)) + 
    geom_point(size=2.5, color="#00CC99") + scale_x_continuous(breaks=seq(0,100,10),limits=c(10,100)) + theme_minimal()
ac + geom_point(aes(Experience_Avg),color="orange",size=2.5) + 
  geom_text(
    label=round(arct$Experience_Avg_Label,0), 
    nudge_x = 0.25, nudge_y = 0.25, 
    check_overlap = T
  ) + 
  geom_vline(xintercept=test_AVG,color="orange",alpha=.5) + #add 2018-2020 avg. 
  geom_vline(xintercept=test_AVG_oil,color="black",alpha=.5) #add 2018-2020 Top Oil Producing Nation avg.

#save plot
ggsave(paste0(pathdir,"Figure_components/Fig_1D_Arctic_Nations_Experience.pdf"))

names(arct)
rm(arct,test,test_AVG,test_AVG_oil,ac)
gc()

```

```{r}
#Load privacy-preserving combined GPS 2018-19 and IPS 2020-21 survey data
load(paste0(pathdir,"GPS_IPS_Combined_Survey_Data.RData"))
setDT(CS)
```


```{r}
#Compute demographic table

#specify demographics to summarize
#"Age_Group","Gender_Group","Region","Town_Settlement"

CS[,.N] #check combined GPS + IPS sample total
sample_total<-1585 #combined sample total

CS[,wts:=as.numeric(wts)]
CS[,sum(wts)]
pop_total<-78910 #2018 + 2020 combined population total from Statistics Greenland

#compute combined unweighted total, unweighted percentage, weighted percentage 
d1<-CS[,.(total=.N, pct = (.N/sample_total), pct.wt=(sum(wts)/pop_total)),by=.(Age_Group)]
setnames(d1,"Age_Group","Demographic_Group")
d2<-CS[,.(total=.N, pct = (.N/sample_total), pct.wt=(sum(wts)/pop_total)),by=.(Gender_Group)]
setnames(d2,"Gender_Group","Demographic_Group")
d3<-CS[,.(total=.N, pct = (.N/sample_total), pct.wt=(sum(wts)/pop_total)),by=.(Region)]
setnames(d3,"Region","Demographic_Group")
d4<-CS[,.(total=.N, pct = (.N/sample_total), pct.wt=(sum(wts)/pop_total)),by=.(Town_Settlement)]
setnames(d4,"Town_Settlement","Demographic_Group")
new<-list(d1,d2,d3,d4,d5)
demo_table<-rbindlist(new,use.names=T,fill=T,idcol ="Demographic_Group")
demo_table[,pct:=round(pct*100,1)]
demo_table[,pct.wt:=round(pct.wt*100,1)]
demo_table
```


```{r}
#Figs 2 & 3 - Plot Greenland region-specific and demographic-specific awareness of anthropogenic climate change and climate change experience population-weighted estimates and color map/bar plots using R survey package

#define weights as numeric vector
CS[,wts:=as.numeric(wts)]
#create total population col
CS[,N:=sum(wts)]

#Create combined survey object
CS.d<- svydesign(id = ~1, data = CS, fpc = ~N, weights = ~wts)

#Awareness of human-caused climate change & climate change experience
#Create weighted grouped population estimate barplots for each subgroup item w/ confidence intervals
#By location scale
#By age group
#By self-ID gender group
#By education
#By Indigenous identity 
#By subsistence occupation

cols <- c("AACC","experience") #survey items to plot
cols_2 <- c("Region","locality_scale","Age_Group","Gender_Group","GL_identity","postpri_education","subsistence_occupation") #factors for subgroup analysis 


for (var in cols){
  for (var_2 in cols_2){
  gmean <- svyby(as.formula(paste0( "~" , var)), as.formula(paste0( "~" , var_2)), design = CS.d, na.rm=T, svymean, vartype=c("se"))
  
  #population grouped bar plot (grouped by var2)
  #format output to DT
  gmean_plot <- as.data.frame(gmean)
  setDT(gmean_plot)
  #convert cols to percent
  cols <- names(gmean_plot)
  cols <- gsub(paste0(var), "", cols)
  setnames(gmean_plot, old=names(gmean_plot), new = cols)
  cols<-cols[-1] #*** don't include ID col (assumes first col is ID)
  gmean_plot=gmean_plot[,(cols):=lapply(.SD, function(x) x*100), .SDcols = cols]
  gmean_plot=gmean_plot[,(cols):=lapply(.SD,function(x) round(x, digits=1)), .SDcols = cols]
  
  #convert to long format for plotting with CIs
  #extract standard error cols ***NOTE: extracts all cols with periods and "se" in col 
  gmean_se <- gmean_plot[,.SD,.SDcols = (names(gmean_plot) %like% "\\." & names(gmean_plot) %like% "se") | names(gmean_plot) == var_2]
  #extract standard error cols ***NOTE: extracts all cols without periods and "se" in col 
  gmean_names <- names(gmean_plot[,.SD,.SDcols = !names(gmean_se)])
  gmean_se <- melt(gmean_se, id.vars= var_2, variable.name = "answers")
  setnames(gmean_se, "value", "SE")
  gmean_plot <- melt(gmean_plot, id.vars= var_2, measure.vars = gmean_names, variable.name = "answers")
  setnames(gmean_plot, "value", "percent")
  gmean_se[,answers:=gsub("se\\.","",answers)]
  gmean_plot[,answers:=as.character(answers)]
  #merge reshaped cols 
  nrow(gmean_se)
  nrow(gmean_plot)
  gmean_plot<-merge(gmean_plot,gmean_se,by=c(var_2,"answers"))
  nrow(gmean_plot)
  
  #grouped bar plot
  gpop<-ggplot(data=gmean_plot, aes_string(x=var_2, y="percent", fill="answers")) + 
  geom_bar(position="dodge", stat="identity", color="white", width=0.8) + 
  geom_errorbar(aes(ymin=percent-SE*1.96, ymax=percent+SE*1.96), position=position_dodge(.8), width=.1, size = .8, alpha = .4) +
  geom_text(aes(label=percent), hjust=-1, position=position_dodge(width=0.8), size=3) +
  labs(x=NULL) +
  scale_y_continuous(breaks=seq(0, 100, 25)) + expand_limits(y=c(0, 100)) +
  theme(plot.margin = unit(c(1, 2, 0, 0), "cm")) +
  coord_flip() + theme(panel.background = element_blank()) +
  ggtitle(paste0(var), subtitle=paste0(var, "_", var_2))
  gpop
  
  #save plots 
  write.csv(gmean_plot, file = paste0(pathdir,"/Figure_components/","Fig_2_3_",var,"_",var_2,"_CS.csv"))
  ggsave(paste0(pathdir,"Figure_components/","Fig_2_3_",var,"_",var_2,"_CS.pdf"))
  ggsave(paste0(pathdir,"Figure_components/","Fig_2_3_",var,"_",var_2,"_CS.png"))
  }}
```

```{r}
#create Regional deviation from Greenland average plots
region_ex <-fread(paste0(pathdir,"/Figure_components/Fig_2_3_experience_Region_CS.csv"),header=T)
region_ac <-fread(paste0(pathdir,"/Figure_components/Fig_2_3_AACC_Region_CS.csv"),header=T)

#compute Greenland weighted averages
nation_ex<-svymean(~experience, design = CS.d, na.rm=T)
nation_ac<-svymean(~AACC, design = CS.d, na.rm=T)

#convert to dt
nation_ex <- as.data.frame(nation_ex,row.names = "exp")
setDT(nation_ex,row.names(T))
nation_ac <- as.data.frame(nation_ac,row.names = T)
setDT(nation_ac,row.names(T))

#convert cols to percent
cols <- names(nation_ex)
cols<-cols[-1] #*** don't include ID col (assumes first col is ID)
nation_ex=nation_ex[,(cols):=lapply(.SD, function(x) x*100), .SDcols = cols]
nation_ex=nation_ex[,(cols):=lapply(.SD,function(x) round(x, digits=1)), .SDcols = cols]

cols <- names(nation_ac)
cols<-cols[-1] #*** don't include ID col (assumes first col is ID)
nation_ac=nation_ac[,(cols):=lapply(.SD, function(x) x*100), .SDcols = cols]
nation_ac=nation_ac[,(cols):=lapply(.SD,function(x) round(x, digits=1)), .SDcols = cols]

#add national average to regional average dts
region_ex[,nation_percent:=nation_ex[rn=="experience1",mean]]
region_ac[,nation_percent:=nation_ac[rn=="AACC1",mean]]

#subset answer rows (% experience, %aware of anthro climate change)
#subtract national average from regional averages to compute deviations
region_ex<-region_ex[answers==1][,deviation:=percent - nation_percent]
region_ac<-region_ac[answers==1][,deviation:=percent - nation_percent]

#Plot on divergent color scale
#Experience
#set scale midpoint
scale_mid<-region_ex[,min(deviation)/(max(deviation)-min(deviation))]
region_ex[,.(max(deviation),min(deviation))]

ggplot(region_ex, aes(x = reorder(Region, deviation), y = deviation)) + 
  geom_bar(stat = "identity",
           show.legend = T,
           aes(fill = deviation)) + 
  xlab("Region") +
  ylab("Deviation") +
  scale_fill_gradientn(
    colours = c("#9b00ff", "aliceblue", "#00ceb5"),
    values = c(0, abs(scale_mid), 1),
    breaks = seq(-8,8,2) #set breaks
  ) + 
  scale_y_continuous(expand = c(0, 0)) + theme_minimal()

ggsave(paste0(pathdir,"/Figure_components/Fig2C_exp_map_colors.pdf"))#save
#note: plotted color values assigned to 2021 Statistics Greenland regional shapefiles in Adobe Illustrator. 

#Awareness of anthropogenic climate change
#set scale midpoint
scale_mid<-region_ac[,min(deviation)/(max(deviation)-min(deviation))]

ggplot(region_ac, aes(x = reorder(Region, deviation), y = deviation)) + 
  geom_bar(stat = "identity",
           show.legend = T,
           aes(fill = deviation)) + 
  xlab("Group") +
  ylab("Value") +
  scale_fill_gradientn(
    colours = c("#ffa500", "#ebebeb", "#0d3a5a"),
    values = c(0, abs(scale_mid), 1),
    breaks = seq(-20,10,5) #set breaks
  ) + 
  scale_y_continuous(expand = c(0, 0)) + theme_minimal()

ggsave(paste0(pathdir,"/Figure_components/Fig2E_map_colors.pdf"))#save
#note: plotted color values assigned to Statistics Greenland regional shapefiles in Adobe Illustrator. 

```

```{r}
#load psych package version 2.2.3 for factor analysis 
install_version("psych", "2.2.3")
library(psych)
packageVersion("psych")
```


```{r}
#Factor analysis 

#Characteristics of respondents by community scale (village, town, city)

#Factor Analysis - PCA Method
#Prepare variables for factor analysis: convert select cols to numeric
cols<-c("Town_Settlement","NO_postpri_education","GL_identity","subsistence_occupation")
CS[,(cols):=lapply(.SD,as.character),.SDcols=cols]
CS[,(cols):=lapply(.SD,as.numeric),.SDcols=cols]

str(CS)

#specify factor analysis parameters
CS.fa<-CS[,.(Town_Settlement,NO_postpri_education,GL_identity,subsistence_occupation)]
cols<names(CS.fa)
CS.fa[,.(Town_Settlement,NO_postpri_education,GL_identity,subsistence_occupation)]

#parallel analysis scree plots to select factor(s)
fa.parallel(CS.fa,n.obs=NULL,fm="minres",fa="pc",nfactors=3,
main="Parallel Analysis Scree Plots",
n.iter=20,error.bars=TRUE,se.bars=FALSE,SMC=FALSE,ylabel=NULL,show.legend=TRUE,
sim=TRUE,quant=.95,cor="tet",use="pairwise",plot=TRUE,correct=.5)
#Note: Supplementary Fig. 1

#select first factor based on parallel analysis (from above)
principal(CS.fa[,1:4], nfactors = 3, rotate = 'none', cor = "tet")
CS.fa.cor <- principal(CS.fa[,1:4], nfactors = 1, rotate = 'none', cor = "tet")
CS.fa.cor

#Extract scoring coefficients
scores<-psych::factor.scores(CS.fa,CS.fa.cor,impute="median") #use regression scoring method with imputation

scores[2] #plot weights (scoring coef)

CS[,culture:=scores[1]] #add predicted factor scores
CS[,.(Region,Town_Settlement,NO_postpri_education,GL_identity,subsistence_occupation,culture)]

#Create binned culture variable
CS[,culture:=round(culture,2)]
CS[,culture_level:=cut(culture,breaks=c(-Inf,seq(-1,1,1),Inf))] #break culture factor into 4 levels across marginal distribution of IC factor (low,medium low,medium high,high)
CS[,culture_level:=plyr::revalue(culture_level, c("(-Inf,-1]"="1_low","(-1,0]"="2_med_low","(0,1]"="3_med_high","(1, Inf]"="4_high"))]

#Prepare variables for factor analysis: convert select cols to factor
cols<-c("Town_Settlement","NO_postpri_education","GL_identity","subsistence_occupation","culture_level")
CS[,(cols):=lapply(.SD,as.character),.SDcols=cols]
CS[,(cols):=lapply(.SD,as.factor),.SDcols=cols]

#Create survey binary
CS[,survey:="1_IPS"]
CS[1:646,survey:="2_GPS"]
CS[,survey:=as.factor(survey)]

#Update survey object with culture factor + survey binary
CS.d<- svydesign(id = ~1, data = CS, fpc = ~N, weights = ~wts)
```

Principal Components Analysis
Call: principal(r = CS.fa[, 1:4], nfactors = 3, rotate = "none", cor = "tet")
Standardized loadings (pattern matrix) based upon correlation matrix

                       PC1  PC2  PC3
SS loadings           2.10 0.72 0.64
Proportion Var        0.52 0.18 0.16
Cumulative Var        0.52 0.70 0.86
Proportion Explained  0.61 0.21 0.19
Cumulative Proportion 0.61 0.81 1.00

Mean item complexity =  1.8
Test of the hypothesis that 3 components are sufficient.

The root mean square of the residuals (RMSR) is  0.11 
 with the empirical chi square  245.19  with prob <  NA 

Fit based upon off diagonal values = 0.9

Principal Components Analysis
Call: principal(r = CS.fa[, 1:4], nfactors = 1, rotate = "none", cor = "tet")
Standardized loadings (pattern matrix) based upon correlation matrix

                PC1
SS loadings    2.10
Proportion Var 0.52

Mean item complexity =  1
Test of the hypothesis that 1 component is sufficient.

The root mean square of the residuals (RMSR) is  0.16 
 with the empirical chi square  479.93  with prob <  6.1e-105 

Fit based upon off diagonal values = 0.81$weights
                             PC1
Town_Settlement        0.4691467
NO_postpri_education   0.4667406
GL_identity            0.4893375
subsistence_occupation 0.5182586



```{r}
#Plot demographic characteristics by location

#By urban scale
cols <- c("Age_Group","Gender_Group","GL_identity","NO_postpri_education","subsistence_occupation") #survey items to plot
cols_2 <- c("locality_scale") #factors for subgroup analysis 

for (var in cols){
  for (var_2 in cols_2){
  #population grouped bar plot (grouped by var2)
  gmean <- svyby(as.formula(paste0( "~" , var)), as.formula(paste0( "~" , var_2)), design = CS.d, na.rm=T, svymean, vartype=c("se"))
  
  #format output to DT
  gmean_plot <- as.data.frame(gmean)
  setDT(gmean_plot)
  #convert cols to percent
  cols <- names(gmean_plot)
  cols <- gsub(paste0(var), "", cols)
  setnames(gmean_plot, old=names(gmean_plot), new = cols)
  cols<-cols[-1] #*** don't include ID col (assumes first col is ID)
  gmean_plot=gmean_plot[,(cols):=lapply(.SD, function(x) x*100), .SDcols = cols]
  gmean_plot=gmean_plot[,(cols):=lapply(.SD,function(x) round(x, digits=1)), .SDcols = cols]
  
  #convert to long format for plotting with CIs
  #extract standard error cols ***NOTE: extracts all cols with periods and "se" in col 
  gmean_se <- gmean_plot[,.SD,.SDcols = (names(gmean_plot) %like% "\\." & names(gmean_plot) %like% "se") | names(gmean_plot) == var_2]
  #extract standard error cols ***NOTE: extracts all cols without periods and "se" in col 
  gmean_names <- names(gmean_plot[,.SD,.SDcols = !names(gmean_se)])
  gmean_se <- melt(gmean_se, id.vars= var_2, variable.name = "answers")
  setnames(gmean_se, "value", "SE")
  gmean_plot <- melt(gmean_plot, id.vars= var_2, measure.vars = gmean_names, variable.name = "answers")
  setnames(gmean_plot, "value", "percent")
  gmean_se[,answers:=gsub("se\\.","",answers)]
  gmean_plot[,answers:=as.character(answers)]
  #merge reshaped cols 
  nrow(gmean_se)
  nrow(gmean_plot)
  gmean_plot<-merge(gmean_plot,gmean_se,by=c(var_2,"answers"))
  nrow(gmean_plot)
  
  #gmean_plot[,Region:=factor(Region,levels=c("Avannaata","Qeqertalik","Qeqqata","East Sermersooq","West Sermersooq","Kujalleq"))]

  gpop<-ggplot(data=gmean_plot, aes_string(x="answers", y="percent", fill=var_2)) + 
    geom_bar(position="dodge", stat="identity") +
    scale_fill_viridis(discrete = T, option = "D") + 
    #geom_text(aes(label=percent), hjust=.5, position=position_dodge(width=1), size=3) +
    labs(x=NULL) +
    scale_y_continuous(breaks=seq(0, 100, 25)) + expand_limits(y=c(0, 100)) +
    theme(plot.margin = unit(c(1, 2, 0, 0), "cm")) +
    theme_minimal() +
    ggtitle(paste0(var), subtitle=paste0(var, "_", var_2))
    gpop
  
  #save plots 
  write.csv(gmean_plot, file = paste0(pathdir,"/Figure_components/Fig_4A_",var,"_",var_2,"_Demo_Scale_CS.csv"))
  ggsave(paste0(pathdir,"/Figure_components/Fig_4A_",var,"_",var_2,"_Demo_Scale_CS.pdf"))
  ggsave(paste0(pathdir,"/Figure_components/Fig_4A_",var,"_",var_2,"_Demo_Scale_CS.png"))
  }}
```


#Logit regression analyses + plots
```{r}
#Run weighted logit regressions and output model summary tables
#IV. Primary Specification: Culture factor, age, sex

#A. Awareness of Anthropogenic Climate Change Primary Regressions
#Output combined model summary tables - Add terms
models <- list(
  "svy:logit climatecause ~ age_group + sex + culture" = svyglm(AACC ~ Age_Group + Gender_Group + culture_level, family=quasibinomial, design=CS.d, na.action=na.omit),
  "svy:logit climatecause ~ age_group + sex + culture + region" = svyglm(AACC ~ Age_Group + Gender_Group + culture_level + Region, family=quasibinomial, design=CS.d, na.action=na.omit),
  "svy:logit climatecause ~ age_group + sex + culture + region + survey" = svyglm(AACC ~ Age_Group + Gender_Group + culture_level + Region + survey, family=quasibinomial, design=CS.d, na.action=na.omit),
  "svy:logit climatecause ~ full model + cultural subfactors" = svyglm(AACC ~ Age_Group + Gender_Group + Region + Town_Settlement + postpri_education + GL_identity + subsistence_occupation, family=quasibinomial, design=CS.d, na.action=na.omit))

#print table with CIs
modelsummary(models, exponentiate = TRUE, fmt = 2,
             estimate = c("{estimate} ({conf.low}, {conf.high}){stars}"),
             notes = list('+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001'),
             statistic = NULL)
#save table
modelsummary(models, exponentiate = TRUE, fmt = 2,
             estimate = c("{estimate} ({conf.low}, {conf.high}){stars}"),
             notes = list('+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001'),
             statistic = NULL,
             output = paste0(pathdir,"Figure_components/Fig4_AACC_model_output_1_table_full.docx")
             )

#B. Experience of Climate Change Regressions
#Output combined model summary tables - Add terms
models <- list(
  "svy:logit EXP ~ age_group + sex + culture" = svyglm(experience ~ Age_Group + Gender_Group + culture_level, family=quasibinomial, design=CS.d, na.action=na.omit),
  "svy:logit EXP ~ age_group + sex + culture + region" = svyglm(experience ~ Age_Group + Gender_Group + culture_level + Region, family=quasibinomial, design=CS.d, na.action=na.omit),
  "svy:logit EXP ~ age_group + sex + culture + region + survey" = svyglm(experience ~ Age_Group + Gender_Group + culture_level + Region + survey, family=quasibinomial, design=CS.d, na.action=na.omit),
  "svy:logit EXP ~ full model + cultural subfactors" = svyglm(experience ~ Age_Group + Gender_Group + Region + Town_Settlement + postpri_education + GL_identity + subsistence_occupation, family=quasibinomial, design=CS.d, na.action=na.omit))

#print table with CIs
modelsummary(models, exponentiate = TRUE, fmt = 2,
             estimate = c("{estimate} ({conf.low}, {conf.high}){stars}"),
             notes = list('+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001'),
             statistic = NULL)
#save table
modelsummary(models, exponentiate = TRUE, fmt = 2,
             estimate = c("{estimate} ({conf.low}, {conf.high}){stars}"),
             notes = list('+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001'),
             statistic = NULL,
             output = paste0(pathdir,"Figure_components/Fig4_EXP_model_output_1_table_full.docx"))
```


```{r}
#Fig 4 - Odds Ratio Plots

#Compare awareness of human-caused climate change and climate change experience across cultural dimension 

#Plot odds ratios
#specify model outcomes for multi-model odds ratio plot
vars <- c("AACC","experience") #DVs
vars_2 <- c("culture_level","Age_Group") #specify RHS levels for OR coefficient extraction

#for each model and var, extract and save OR values
for (var in vars) { 
  #add demographic controls
  logit_1<-svyglm(as.formula(paste0(var, "~", "culture_level + Age_Group + Gender_Group")), family=quasibinomial, design=CS.d, na.action=na.omit)
  logit_1<-exp(cbind(OR = stats::coef(logit_1), confint(logit_1)))
  logit_1<-as.data.frame(logit_1)
  setDT(logit_1, keep.rownames = "Outcome")
  write.csv(logit_1,file=paste0(pathdir,var,"_","Fig4_demo_logit.csv"))

  #add geographic controls
  logit_2<-svyglm(as.formula(paste0(var, "~", "culture_level + Age_Group + Gender_Group + Region")), family=quasibinomial, design=CS.d, na.action=na.omit)
  logit_2<-exp(cbind(OR = stats::coef(logit_2), confint(logit_2)))
  logit_2<-as.data.frame(logit_2)
  setDT(logit_2, keep.rownames = "Outcome")
  write.csv(logit_2,file=paste0(pathdir,var,"_","Fig4_demo_geo_logit.csv"))
  
  #alternative full model specification with cultural subfactors 
  logit_3<-svyglm(as.formula(paste0(var, "~", "Age_Group + Gender_Group + Town_Settlement + postpri_education + GL_identity + subsistence_occupation")), family=quasibinomial, design=CS.d, na.action=na.omit)
  logit_3<-exp(cbind(OR = stats::coef(logit_3), confint(logit_3)))
  logit_3<-as.data.frame(logit_3)
  setDT(logit_3, keep.rownames = "Outcome")
  write.csv(logit_3,file=paste0(pathdir,var,"_","Fig4_demo_full_logit.csv"))
  
  #alternative full model specification with cultural subfactors add geographic controls
  logit_4<-svyglm(as.formula(paste0(var, "~", "Age_Group + Gender_Group + Region + Town_Settlement + postpri_education + GL_identity + subsistence_occupation")), family=quasibinomial, design=CS.d, na.action=na.omit)
  logit_4<-exp(cbind(OR = stats::coef(logit_4), confint(logit_4)))
  logit_4<-as.data.frame(logit_4)
  setDT(logit_4, keep.rownames = "Outcome")
  write.csv(logit_4,file=paste0(pathdir,var,"_","Fig4_demo_geo_full_logit.csv"))
}
#I. OR Plots for full model + culture factor levels
#load extracted data  
for (var in vars) {
  for (var_2 in vars_2) {
  #load logit ORs for plotting composite model plot
  #add demographic controls
  logit_1<-fread(paste0(pathdir,var,"_","Fig4_demo_logit.csv"),header=T)
  setnames(logit_1,old=c("2.5 %","97.5 %"),new=c("lower","upper")) #Prep CIs
  logit_1<-logit_1[Outcome%like%var_2] #select OR coefficient
  logit_1[Outcome%like%var_2,Outcome:=paste0("demo_",Outcome)] #add var and model to outcome
  logit_1[,group:=paste0(var)] #add variable grouping var

  #add geographic controls
  logit_2<-fread(paste0(pathdir,var,"_","Fig4_demo_geo_logit.csv"),header=T)
  setnames(logit_2,old=c("2.5 %","97.5 %"),new=c("lower","upper")) #Prep CIs
  logit_2<-logit_2[Outcome%like%var_2] #select outcome
  logit_2[Outcome%like%var_2,Outcome:=paste0("demo_geo_",Outcome)] #add var and model to outcome
  logit_2[,group:=paste0(var)] #add variable grouping var
  logit<-rbindlist(list(logit_1,logit_2),use.names = T, fill = T)
  logit[,rnum:=.I] #add row number to order outcomes on x axis
  logit[,Outcome:=paste0(rnum,"_",Outcome)]
  
  #rename group col
  #line + dot plot
  logit_plot<-ggplot(logit) +
  geom_pointrange(aes(x=OR, y=Outcome, xmin=lower, xmax=upper, color=factor(Outcome)), alpha=0.9, size=.75) +
  theme(plot.margin = unit(c(1, 2, 0, 0), "cm")) +
  theme(panel.background = element_blank()) + scale_x_continuous(breaks=seq(.5, 5, .5)) + coord_trans(x = "log10") + expand_limits(x=c(.2, 5.25)) + theme(aspect.ratio=1, axis.text.y=element_blank(), legend.text=element_text(size=7)) + ggtitle(paste0(var, "_", var_2))
  logit_plot

  #save plots 
  write.csv(logit, file = paste0(pathdir,"Figure_components/Fig4_logit_plot_",var,"_",var_2,".csv"))
  ggsave(paste0(pathdir,"Figure_components/Fig4_logit_plot_",var,"_",var_2,".pdf"))
  ggsave(paste0(pathdir,"Figure_components/Fig4_logit_plot_",var,"_",var_2,".png"))
  logit_plot
  }}

#II. Plot odds ratios for full model subcultural factors
#specify model outcomes for multi-model odds ratio plot
vars <- c("AACC","experience") #DVs
vars_2 <- c("Town_Settlement","postpri_education","GL_identity","subsistence_occupation") #specify RHS levels for OR coefficient extraction

for (var in vars) {
  for (var_2 in vars_2) {
  #load logit ORs for plotting composite full model plot
  #add demographic controls to full model
  logit_3<-fread(paste0(pathdir,var,"_","Fig4_demo_full_logit.csv"),header=T)
  setnames(logit_3,old=c("2.5 %","97.5 %"),new=c("lower","upper")) #Prep CIs
  logit_3<-logit_3[Outcome%like%var_2] #select OR coefficient
  logit_3[Outcome%like%var_2,Outcome:=paste0("demo_full",Outcome)] #add var and model to outcome
  logit_3[,group:=paste0(var)] #add variable grouping var

  #add geographic controls to full model
  logit_4<-fread(paste0(pathdir,var,"_","Fig4_demo_geo_full_logit.csv"),header=T)
  setnames(logit_4,old=c("2.5 %","97.5 %"),new=c("lower","upper")) #Prep CIs
  logit_4<-logit_4[Outcome%like%var_2] #select outcome
  logit_4[Outcome%like%var_2,Outcome:=paste0("demo_geo_full",Outcome)] #add var and model to outcome
  logit_4[,group:=paste0(var)] #add variable grouping var
  logit<-rbindlist(list(logit_3,logit_4),use.names = T, fill = T)
  logit[,rnum:=.I] #add row number to order outcomes on x axis
  logit[,Outcome:=paste0(rnum,"_",Outcome)]
  
  #rename group col
  #line + dot plot
  logit_plot<-ggplot(logit) +
  geom_pointrange(aes(x=OR, y=Outcome, xmin=lower, xmax=upper, color=factor(Outcome)), alpha=0.9, size=.75) +
  theme(plot.margin = unit(c(1, 2, 0, 0), "cm")) +
  theme(panel.background = element_blank()) + scale_x_continuous(breaks=seq(.5, 5, .5)) + coord_trans(x = "log10") + expand_limits(x=c(.2, 5.25)) + theme(aspect.ratio=1, axis.text.y=element_blank(), legend.text=element_text(size=7)) + ggtitle(paste0(var, "_", var_2))
  logit_plot

  #save plots 
  write.csv(logit, file = paste0(pathdir,"Figure_components/Fig4_logit_plot_full",var,"_",var_2,".csv"))
  ggsave(paste0(pathdir,"Figure_components/Fig4_logit_plot_full",var,"_",var_2,".pdf"))
  ggsave(paste0(pathdir,"Figure_components/Fig4_logit_plot_full",var,"_",var_2,".png"))
  logit_plot
  }}

#plots formatted in Adobe Creative Suite
```


